**********************************
*** Parallel Trends **************
*** Author: RK. ******************
*** Figure A1 (set up data)  *****
**********************************
**********************************


*****************************************
*	Setting Paths		                *
*****************************************

** SET PATH TO THE JOP Replication files folder in your computer

***** PANEL A Figure A.1 (fertility)


** Import DID dataset

use "DATA FILES TO SHARE/main_ew_hh_df.dta", clear

** keep second wave (i.e. year from which we have the birth data)
 keep if year == 1
 keep if w1_abshusband_dummy == 0

 * this is our DID sample 
tab w1_abshusband_dummy w2_abshusband_dummy


** genderate the UID 
gen str13 uid = string(STATEID,"%02.0f") + string(DISTID,"%02.0f") + string(PSUID,"%02.0f") + string(HHID,"%03.0f") + string(HHSPLITID,"%02.0f") + string(PERSONID,"%02.0f")

** check for duplicates 
quietly bys uid:  gen dup = cond(_N==1,0,_n)
 tab dup
 * No duplicates 
 

** create tempfile to merge with birth data 
tempfile ew_main
save `ew_main'

*** import the raw IHDS birth history file for 2011-12

use "DATA FILES TO SHARE/IHDS_RAW/36151-0004-Data.dta", clear

sort  STATEID DISTID PSUID HHID HHSPLITID EW3

** create UID 
 gen str13 uid = string(STATEID,"%02.0f") + string(DISTID,"%02.0f") + string(PSUID,"%02.0f") + string(HHID,"%03.0f") + string(HHSPLITID,"%02.0f") + string(EW3,"%02.0f")
egen ew_uid = group(uid) 
sum ew_uid
 
 
** there will be duplicates - since one mother can have multiple  
merge m:1 uid using `ew_main'


*** the birth history data will have one woman included multiple times bc it will record her birth in each year 
*** this will tell us how many UIDs matched in both DFs 
egen matched_both = group(uid) if _merge==3
sum matched_both

** 23836 women in the EW df matched in both 


** same for unmatched in the birth data
egen unmatched_ewmain = group(uid) if _merge==2
sum unmatched_ewmain

** same for unmatched in the EW data 

egen unmatched_birthdata = group(uid) if _merge==1
sum unmatched_birthdata

** only 453 UIDs in our EW data (out of 24289 were unmatched)

** we want only thos who were matched in both 
keep if _merge == 3

replace BH5B=2000 if BH5B==0 
replace BH5B=2001 if BH5B==1 
replace BH5B=2002 if BH5B==2 
replace BH5B=2003 if BH5B==3 
replace BH5B=2004 if BH5B==4 
replace BH5B=2005 if BH5B==5 
replace BH5B=2006 if BH5B==6 
replace BH5B=2007 if BH5B==7 
replace BH5B=2008 if BH5B==8 
replace BH5B=2009 if BH5B==9 
replace BH5B=2010 if BH5B==10 
replace BH5B=2011 if BH5B==11 
replace BH5B=2012 if BH5B==12 
replace BH5B=1999 if BH5B==99 
replace BH5B=1998 if BH5B==98 
replace BH5B=1997 if BH5B==97 
replace BH5B=1996 if BH5B==96 
replace BH5B=1995 if BH5B==95 
replace BH5B=1994 if BH5B==94 
replace BH5B=1993 if BH5B==93 
replace BH5B=1992 if BH5B==92 
replace BH5B=1991 if BH5B==91 
replace BH5B=1990 if BH5B==90 
replace BH5B=1989 if BH5B==89 



**** identify the birth year based on approx age at the time of the interview 

gen yr_birth = 2012-BH6A

replace yr_birth = BH5B if BH6A == .

**** gennerate a new vars that wwill identify the number of years before treatment 

gen pre_trt_yr = yr_birth-2012

sort w2_abshusband_dummy pre_trt_yr
by w2_abshusband_dummy pre_trt_yr: summarize BHED 

drop if pre_trt_yr == .

** we want to get the number of women in treatement and control groups in this birth hist-EW df. 
quietly bys uid:  gen dup_2 = cond(_N==1,0,_n)
 tab dup_2
 gen unique = 1 if  dup_2 <2

 ** this will give us the info -- we will be using this in making the graph 
 tab w2_abshusband_dummy if unique == 1
 
 /*
 w2_abshusba |
   nd_dummy |      Freq.     Percent        Cum.
------------+-----------------------------------
          0 |     21,208       96.29       96.29
          1 |        818        3.71      100.00
------------+-----------------------------------
      Total |     22,026      100.00


 */
 
 keep pre_trt_yr w2_abshusband_dummy

save "DATA FILES TO SHARE/TEMP_FILES/birth_hist.dta", replace



***** 2. PANEL B - LOANS DATA *********

* Import loans data from IHDS HH dataset 

use "DATA FILES TO SHARE/IHDS_RAW/36151-0002-Data.dta", clear

tab DB2A

** there are 22,487 HH who answered this question

keep if DB2A >=0
drop if DB2A == .

***We are left with 22,487 (1 was a valid blank) observations 

** NOW get the HHID for these observations 

sort  STATEID DISTID PSUID HHID HHSPLITID 

 gen str11 uid = string(STATEID,"%02.0f") + string(DISTID,"%02.0f") + string(PSUID,"%02.0f") + string(HHID,"%03.0f") + string(HHSPLITID,"%02.0f")


sort uid

quietly by uid:  gen dup = cond(_N==1,0,_n) 

tab dup
keep if dup == 0



keep DB2 DB2A DB2B uid

tempfile loans_12
save `loans_12'

use "DATA FILES TO SHARE/main_ew_hh_df.dta", clear


keep if year == 1

gen str11 uid = string(STATEID,"%02.0f") + string(DISTID,"%02.0f") + string(PSUID,"%02.0f") + string(HHID,"%03.0f") + string(HHSPLITID,"%02.0f")
sort uid


quietly by uid:  gen dup = cond(_N==1,0,_n) 

tab dup
keep if dup == 0


drop if dup>0

merge 1:1 uid using `loans_12'

keep _merge w2_abshusband_dummy DB2A

tab DB2A w2_abshusband_dummy
keep if _merge == 3

save "DATA FILES TO SHARE/TEMP_FILES/loans_12.dta", replace


*** Run Figure_A1.R to get graph A1 ***
